Introduction

The purpose of this workflow is to identify aging signatures by comparing gene expression at 12 months and 4 months in WT/WT mice.

We can then look for accelerated or delayed versions of this in the various Kl variants.

rm(list = ls())

library(here)

results.dir <- here("Results", "Aging")
if(!file.exists(results.dir)){dir.create(results.dir)}

general.data.dir <- here("Data", "general")

geno.cols <- c("WT/WT" = "darkgray", "WT/VS" = "#a6bddb", "VS/VS" = "#2b8cbe", "WT/FC" = "#a1d99b", "FC/FC" = "#31a354")
all.fun <- list.files(here("Code"), full.names = TRUE)
for(i in 1:length(all.fun)){source(all.fun[i])}

group.processed.data.dir <- here("Results", "all", "processed_data")
general.processed.data.dir <- here("Results", "Processed_Data")
needed.libraries <- c("pheatmap", "DT", "gprofiler2")
load_libraries(needed.libraries)
#gene information tables
mouse.gene.table <- read.table(file.path(general.data.dir, "mouse_gene_info.txt"), sep = "\t", header = TRUE)
scaled.expr <- readRDS(file.path(group.processed.data.dir, paste0("Scaled_Expression.RDS")))
covar.table <- read.csv(file.path(group.processed.data.dir, "mouse_info.csv"))

We want to identify an aging signature in the WT mice. We can then look in the different Kl genotypes to see if there is an acceleration or delay of aging based on Kl genotype.

We fit a linear model to explain gene expression using age. The following plot shows the

#pull out wt mice
wt.idx <- which(covar.table[,"genotype"] == "WT/WT")
u_age <- sort(unique(covar.table[,"age_batch"]))
wt_age <- lapply(u_age, function(x) intersect(wt.idx, which(covar.table[,"age_batch"] == x)))
group_id <- lapply(wt_age, function(x) if(length(x) > 0){rownames(covar.table)[x]})
names(group_id) <- u_age

#adjust expression by non-age covariates (sex, batch)
dummy.covar <- dummy_covar(covar.table[,c("sequencingBatch", "sex_ge")])
adj_expr <- adjust(t(scaled.expr), dummy.covar)

wt.id <- unlist(group_id)
#fit a model in the WT mice to gene expression with age as the explanatory variable
age.test <- apply(adj_expr[wt.id,], 2, function(x) lm(x~covar.table[wt.id,"age_batch"]))
age.p  <- sapply(age.test, function(x) coef(summary(x))[2,"Pr(>|t|)"])
age.effect <- sapply(age.test, function(x) coef(summary(x))[2,"Estimate"])

The following plots show the p value distribution and the volcano plot for age-related changes in gene expression in the WT mice.

fdr.thresh = 0.2

Red dots indicate genes that are significantly differentially expressed with age at an FDR of 0.2.

adj.p <- p.adjust(age.p, "fdr")

sig.idx <- which(adj.p <= fdr.thresh)
sig.col <- rep("black", length(adj.p))
sig.col[sig.idx] <- "red"

par(mfrow = c(1,2))
qqunif.plot(age.p)
plot(age.effect, -log10(age.p), col = sig.col, pch = 16)

A table of these genes is shown below.

gene.id <- colnames(adj_expr)[sig.idx]
gene.idx <- match(gene.id, mouse.gene.table[,"ensembl_gene_id"])
diff.gene <- cbind(mouse.gene.table[gene.idx,], signif(age.effect[gene.id],2), 
    signif(age.p[gene.id], 2), signif(adj.p[gene.id], 2))
colnames(diff.gene)[c(7:9)] <- c("effect", "p", "p_adjusted")
datatable(diff.gene)

The following tables show functional enrichments for up-, down-, and any differentially expressed gene.

up_idx <- which(diff.gene[,"effect"] > 0)
down_idx <- which(diff.gene[,"effect"] < 0)
up_enrich <- gost(diff.gene[up_idx, "ensembl_gene_id"], organism = "mmusculus", 
    sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
down_enrich <- gost(diff.gene[down_idx, "ensembl_gene_id"], organism = "mmusculus", 
    sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
all_enrich <- gost(diff.gene[, "ensembl_gene_id"], organism = "mmusculus", 
    sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))

plot.enrichment(up_enrich, plot.label = "Up-regulated with age")

plot.enrichment(down_enrich, plot.label = "Down-regulated with age")

## NULL
plot.enrichment(all_enrich, plot.label = "Different with age")

The following heat map shows gene expression for the genes that were differentially expressed by age.

diff.expr <- adj_expr[wt.id,gene.id]
age.df <- data.frame("age" = as.factor(covar.table[wt.id,"age_batch"]))
rownames(age.df) <- wt.id
colnames(diff.expr) <- diff.gene[,"external_gene_name"]
pheatmap(t(diff.expr), show_colnames = FALSE, annotation_col = age.df)

The following plot shows the decomposition of the expression matrix in the wild type animals. Each dot is an individual, and color indicates the age of the individual.

plot.decomp(diff.expr, cols = as.numeric(as.factor(covar.table[wt.idx,"age_batch"])))
legend("topright", legend = unique(covar.table[wt.id,"age_batch"]), col = c(1,2), pch = 16)

The following plot shows the decomposition of the expression matrix, where each dot is a gene. Genes are colored by whether they are up-regulated with aging (red) or downregulated with age (blue).

wt.diff.decomp <- plot.decomp(t(diff.expr), label.points = TRUE, cols = colors.from.values(diff.gene[,"effect"], 
    use.pheatmap.colors = TRUE), main = "Age-Related Genes", xlim = c(-0.2, 0.45), label.cex = 0.7)

We then looked at the expression of these genes in the Kl variant carriers.

Gene expression of these genes do a good job separating all genotypes by age. We show this in two ways. First by the decomposition of the aging gene expression matrix for the group. And second by weighting the individuals based on the loadings from the WT animals to give each animal an aging score. The bars are colored by age group.

kl.carrier.idx <- which(covar.table[,"genotype"] != "WT/WT")
groups <- unique(covar.table[kl.carrier.idx, c("age_batch", "genotype")])
u_geno <- unique(groups[,"genotype"])

cluster_sep <- rep(NA, length(u_geno))
names(cluster_sep) <- u_geno
par(mfrow = c(2,2))
for(g in 1:length(u_geno)){
    geno.expr.idx <- which(covar.table[,"genotype"] == u_geno[g])
    geno.expr <- adj_expr[geno.expr.idx, gene.id]
    colnames(geno.expr) <- diff.gene[,"external_gene_name"]
    
    g.age.df <- data.frame(covar.table[geno.expr.idx,"age_batch"])
    colnames(g.age.df) <- "age"
    rownames(g.age.df) <- rownames(covar.table)[geno.expr.idx]
    #pheatmap(geno.expr, annotation_row = g.age.df)
    age.group <- as.numeric(as.factor(covar.table[geno.expr.idx,"age_batch"]))
    geno.decomp <- plot.decomp(geno.expr, cols = age.group,
        main = u_geno[g])

    ind.score <- (geno.expr)%*%wt.diff.decomp$u[,1,drop=FALSE]
    ind.order <- order(ind.score[,1])
    barplot(ind.score[ind.order,1], col = age.group[ind.order], names = NA,
        ylab = "Aging Score", main = u_geno[g])
    group.age <- unique(covar.table[geno.expr.idx,"age_batch"])
    legend("topleft", legend = group.age, col = c(1:length(group.age)), pch = 16)

    cl.test <- silhouette_coef(geno.decomp$u, membership = age.group)
    cluster_sep[g] <- mean(cl.test, na.rm = TRUE)
    #legend("topright", legend = unique(covar.table[wt.id,"age_batch"]), col = c(1,2), pch = 16)
}

The following barplot shows an estimate of the separation of the groups by age for each genotype based on the differentially expressed genes. It looks as if the FC/FC has the best separation, which could mean that it has an enhanced aging phenotype relative to the other genotypes?

barplot(cluster_sep, beside = TRUE, col = geno.cols[names(cluster_sep)], main = "Cluster separation")

The following plot shows the decomposition of the aging genes for all animals colored by genotype. There doesn’t appear to be any difference based on genotype. In fact the WT/WT animals occupy the extreme values for PC1.

Can we conclude that overall, transcriptional aging is not affected by Kl variant?

test <- adj_expr[,gene.id]
age.decomp <- plot.decomp(test, cols = as.numeric(as.factor(covar.table[,"age_batch"])))

geno.col <- rep("black", nrow(covar.table))
for(g in 1:length(geno.cols)){
    geno.col[which(covar.table[,"genotype"] == names(geno.cols)[g])] <- geno.cols[g]
}
plot(age.decomp$u, col = geno.col, pch = 16, xlab = "PC1", ylab = "PC2")

geno.df <- covar.table[,"genotype"]
pheatmap(test)

Interaction term

We can also look at age-associated genes by testing the interaction between genotype and age. To do this, we used VS/FC carrier status as genotype. So now, instead of looking for age-related genes in the WT animals, we are looking for genes that have main age effects and age-by-genotype interaction effects in the VS and FC carriers (WT mice are excluded here).

The following qq plots show p value distributions for the different pieces of the model.

test.idx <- setdiff(kl.carrier.idx, which(covar.table[,"age_batch"] == 24))
dummy.geno <- rep(0, length(test.idx))
dummy.geno[grep("VS", covar.table[test.idx,"genotype"])] <- 1
dummy.age <- as.numeric(as.factor(covar.table[test.idx,"age_batch"]))
test <- apply(adj_expr[test.idx,], 2, function(x) lm(x~dummy.geno*dummy.age))
test.coef <- t(sapply(test, coef))
test.p <- t(sapply(test, function(x) summary(x)$coefficients[,"Pr(>|t|)"]))

par(mfrow = c(2,2))
for(i in 2:4){
    qqunif.plot(test.p[,i], plot.label = colnames(test.p)[i])
}

The following plot shows how many genes had significant effects from each of the terms. There were more genes that had significant age effects in this group. There were some that had an interaction between age and genotype.

int.p <- test.p[,4]
adj.int.p <- p.adjust(int.p, "fdr")
sig.int <- which(adj.int.p <= fdr.thresh)

sig.any <- apply(test.p, 2, function(x) which(p.adjust(x, "fdr") <= fdr.thresh))
num.sig <- sapply(sig.any, length)
barplot_with_num(num.sig[2:4])

The interaction plots for these genes are shown below.

int.labels <- paste(rep(c("4mo", "12mo"), 2), rep(c("FC", "VS"), each = 2))
groups <- unique(covar.table[test.idx,c("age_batch", "genotype")])
par(mfrow = c(2,3))
for(i in 1:length(sig.any[[4]])){
    gene.id <- names(sig.any[[4]])[i]
    gene.name <- mouse.gene.table[which(mouse.gene.table[,"ensembl_gene_id"] == gene.id), "external_gene_name"]
    a <- boxplot(adj_expr[test.idx,names(sig.any[[4]])[i]]~dummy.age*dummy.geno, main = gene.name,
        names = int.labels, xlab = "", ylab = "Expression")
    #interaction.plot(dummy.age, dummy.geno, adj_expr[test.idx,names(sig.any[[4]])[i]], main = gene.name)
}

#cat(names(sig.any[[4]]), sep = "\n")
#mouse.gene.table[match(names(sig.any[[4]]), mouse.gene.table[,"ensembl_gene_id"]), "external_gene_name"]

We also looked at the genes that had a main effect of age in this group. The enrichments shown below are very similar to the age-related genes in the WT animals.

#play the same separation game as above, but with the new list
gene.id <- names(sig.any[[3]])
gene.idx <- match(gene.id, mouse.gene.table[,"ensembl_gene_id"])
diff.gene <- cbind(mouse.gene.table[gene.idx,], signif(test.coef[gene.id,"dummy.age"],2), 
    signif(test.p[gene.id,"dummy.age"], 2), signif(p.adjust(test.p[gene.id,"dummy.age"], "fdr"), 2))
colnames(diff.gene)[c(7:9)] <- c("effect", "p", "p_adjusted")
datatable(diff.gene)
up_idx <- which(diff.gene[,"effect"] > 0)
down_idx <- which(diff.gene[,"effect"] < 0)
up_enrich <- gost(diff.gene[up_idx, "ensembl_gene_id"], organism = "mmusculus", 
    sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
down_enrich <- gost(diff.gene[down_idx, "ensembl_gene_id"], organism = "mmusculus", 
    sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))

all_enrich <- gost(diff.gene[, "ensembl_gene_id"], organism = "mmusculus", 
    sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))

plot.enrichment(up_enrich, plot.label = "Up-regulated with age")

plot.enrichment(down_enrich, plot.label = "Down-regulated with age")

plot.enrichment(all_enrich, plot.label = "Different with age")

These genes do a similar job of separating the ages overall.

diff.expr <- adj_expr[test.idx,gene.id]
age.df <- data.frame("age" = as.factor(covar.table[test.idx,"age_batch"]))
rownames(age.df) <- rownames(adj_expr)[test.idx]
colnames(diff.expr) <- diff.gene[,"external_gene_name"]
pheatmap(t(diff.expr), show_colnames = FALSE, annotation_col = age.df)

age.col <- as.numeric(as.factor(covar.table[test.idx,"age_batch"]))
plot.decomp(diff.expr, cols = age.col)
legend("topright", legend = unique(covar.table[test.idx,"age_batch"]), col = c(1,2), pch = 16)